library(readr)
library(SciViews)
library(scatterplot3d)
library(car)
## Loading required package: carData
library(lattice)
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(ggplot2)
library(ggridges)
library(ggvis)
##
## Attaching package: 'ggvis'
## The following object is masked from 'package:ggplot2':
##
## resolution
library(ggthemes)
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggthemes':
##
## theme_map
library(gapminder)
library(gganimate)
## No renderer backend detected. gganimate will default to writing frames to separate files
## Consider installing:
## - the `gifski` package for gif output
## - the `av` package for video output
## and restarting the R session
##
## Attaching package: 'gganimate'
## The following object is masked from 'package:ggvis':
##
## view_static
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ stringr 1.4.1
## ✔ tidyr 1.2.1 ✔ forcats 0.5.2
## ✔ purrr 0.3.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some() masks car::some()
library(grid)
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(RColorBrewer)
sparrows <- read_csv("/Users/ajayvishnu/Desktop/RUTGERS/Spring 2023/Multivariate Analysis/Datasets/Bumpus_sparrows.csv")
## Rows: 49 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Survivorship
## dbl (5): Total_length, Alar_extent, L_beak_head, L_humerous, L_keel_sternum
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(sparrows)
## spc_tbl_ [49 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Survivorship : chr [1:49] "S" "S" "S" "S" ...
## $ Total_length : num [1:49] 156 154 153 153 155 163 157 155 164 158 ...
## $ Alar_extent : num [1:49] 245 240 240 236 243 247 238 239 248 238 ...
## $ L_beak_head : num [1:49] 31.6 30.4 31 30.9 31.5 32 30.9 32.8 32.7 31 ...
## $ L_humerous : num [1:49] 18.5 17.9 18.4 17.7 18.6 19 18.4 18.6 19.1 18.8 ...
## $ L_keel_sternum: num [1:49] 20.5 19.6 20.6 20.2 20.3 20.9 20.2 21.2 21.1 22 ...
## - attr(*, "spec")=
## .. cols(
## .. Survivorship = col_character(),
## .. Total_length = col_double(),
## .. Alar_extent = col_double(),
## .. L_beak_head = col_double(),
## .. L_humerous = col_double(),
## .. L_keel_sternum = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
boxplot(sparrows[,2:6])

stars(sparrows,labels = sparrows$Survivorship)

- The str function gives us a total summary of the data.
- The boxplot isn’t giving much value as the data of different
variables are at different levels.
- The stars function gave a much better result in this case. We can
see that all the visuals for Survivors are all similar and the visuals
for all non-survivors are all similar.
Plots
attach(sparrows)
sparr.birds <- data.frame(as.numeric(rownames(sparrows)),sparrows[,2:6])
labs.diagonal <- c("Bird","Total length","Alar extent","L. beak & head","L. humerous","L. keel & sternum")
plot(Total_length, Alar_extent,xlab="Total Length (mm)",ylab="Alar extent (nm)",pch=c(16,1))

pairs(sparrows[,2:6])

- Based on the pairs plot, we tried to check any correlations between
the variables in the data.
pairs(sparr.birds, diag.panel = panel.boxplot, labels=labs.diagonal,pch=c(1,16),font.labels=2)

- This gives us correlation plots along with boxplots for each
variable.
3-D Plots
s3d <- scatterplot3d(Alar_extent,Total_length,L_beak_head,pch=c(1,16)[as.numeric(Survivorship)],xlab="Alar extent", ylab="", angle=45,zlab="Length of beak and head", lty.hide=2,type="h",y.margin.add=0.1,font.axis=2,font.lab=2)
## Warning in scatterplot3d(Alar_extent, Total_length, L_beak_head, pch = c(1, :
## NAs introduced by coercion
legend(s3d$xyz.convert(238, 160, 34.1),c("Non-survivor","Survivor"),pch=c(1,16),text.font=2)

Scatterplot
defaultW <- getOption('warn')
options(warn = -1)
scatterplotMatrix(~Total_length+Alar_extent+L_beak_head+L_humerous+L_keel_sternum | Survivorship, data=sparr.birds, var.labels=labs.diagonal,cex.labels=0.7, diagonal="boxplot",smooth=FALSE,reg.line=FALSE,pch=c(1,16),col=rep("black",2), legend.plot=FALSE)

options(warn = defaultW)
super.sym <- trellis.par.get("superpose.symbol")
super.sym$superpose.symbol$pch <- c(1,16,rep(1,5))
super.sym$superpose.symbol$col <- rep("#000000",7)
trellis.par.set(super.sym)
splom(~sparr.birds, groups = Survivorship, data = sparr.birds, ps=0.5, varname.cex = .5,panel = panel.superpose,key = list(columns = 2,points = list(pch = super.sym$pch[1:2], col=super.sym$col[1:2]),text = list(c("Non-survivor", "Survivor"))))

ggscatmat(sparrows, columns=2:6, color="Survivorship")

- All the above plots do not give us any proper inference that we can
imply for sure.
- We further analyse using GGplots
GG Plots
ggplot(sparrows, aes(x=Survivorship,y=Total_length)) + geom_point(aes(col=Survivorship))

ggplot(sparrows, aes(x=Survivorship,y=Alar_extent)) + geom_point(aes(col=Survivorship))

ggplot(sparrows, aes(x=Survivorship,y=L_beak_head)) + geom_point(aes(col=Survivorship))

ggplot(sparrows, aes(x=Survivorship,y=L_keel_sternum)) + geom_point(aes(col=Survivorship))

ggplot(sparrows, aes(x=Survivorship,y=L_humerous)) + geom_point(aes(col=Survivorship))

ggplot(sparrows, aes(x=Total_length,y=Survivorship)) + facet_wrap(Alar_extent) + geom_point()

ggplot(sparrows, aes(x=Total_length,y=Survivorship)) + facet_wrap(L_beak_head) + geom_point()

ggplot(sparrows, aes(x=Total_length,y=Survivorship)) + facet_wrap(L_humerous) + geom_point()

ggplot(sparrows, aes(x=Total_length,y=Survivorship)) + facet_wrap(L_keel_sternum) + geom_point()

ggplot(sparrows, aes(Total_length, col = Survivorship)) + geom_bar(position="stack")

ggplot(sparrows, aes(Alar_extent, col = Survivorship)) + geom_bar(position="stack")

ggplot(sparrows, aes(L_beak_head, col = Survivorship)) + geom_bar(position="stack")

ggplot(sparrows, aes(L_humerous, col = Survivorship)) + geom_bar(position="stack")

ggplot(sparrows, aes(L_keel_sternum, col = Survivorship)) + geom_bar(position="stack")

ggplot(sparrows, aes(Total_length, col=Survivorship)) + facet_grid(.~Survivorship) + geom_bar(position="dodge")

ggplot(sparrows, aes(Alar_extent, col=Survivorship)) + facet_grid(.~Survivorship) + geom_bar(position="dodge")

ggplot(sparrows, aes(L_beak_head, col=Survivorship)) + facet_grid(.~Survivorship) + geom_bar(position="dodge")

ggplot(sparrows, aes(L_humerous, col=Survivorship)) + facet_grid(.~Survivorship) + geom_bar(position="dodge")

ggplot(sparrows, aes(L_keel_sternum, col=Survivorship)) + facet_grid(.~Survivorship) + geom_bar(position="dodge")

ggplot(sparrows, aes(Total_length))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(sparrows, aes(Alar_extent))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(sparrows, aes(L_beak_head))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(sparrows, aes(L_humerous))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(sparrows, aes(L_keel_sternum))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(sparrows, aes(Total_length))+geom_histogram(aes(fill = after_stat(count)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(sparrows, aes(Alar_extent))+geom_histogram(aes(fill = after_stat(count)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(sparrows, aes(L_beak_head))+geom_histogram(aes(fill = after_stat(count)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(sparrows, aes(L_humerous))+geom_histogram(aes(fill = after_stat(count)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(sparrows, aes(L_keel_sternum))+geom_histogram(aes(fill = after_stat(count)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(sparrows, aes(x=Survivorship, y=Total_length)) + geom_violin()

ggplot(sparrows, aes(x=Survivorship, y=Alar_extent)) + geom_violin()

ggplot(sparrows, aes(x=Survivorship, y=L_beak_head)) + geom_violin()

ggplot(sparrows, aes(x=Survivorship, y=L_humerous)) + geom_violin()

ggplot(sparrows, aes(x=Survivorship, y=L_keel_sternum)) + geom_violin()

ggplot(sparrows, aes(x=Survivorship, y=Total_length)) + geom_boxplot()

ggplot(sparrows, aes(x=Survivorship, y=Alar_extent)) + geom_boxplot()

ggplot(sparrows, aes(x=Survivorship, y=L_beak_head)) + geom_boxplot()

ggplot(sparrows, aes(x=Survivorship, y=L_humerous)) + geom_boxplot()

ggplot(sparrows, aes(x=Survivorship, y=L_keel_sternum)) + geom_boxplot()

ggplot(sparrows, aes(x=Total_length, fill=Survivorship, color=Survivorship)) + geom_density(alpha=0.3, aes(y=..scaled..))

ggplot(sparrows, aes(x=Alar_extent, fill=Survivorship, color=Survivorship)) + geom_density(alpha=0.3, aes(y=..scaled..))

ggplot(sparrows, aes(x=L_beak_head, fill=Survivorship, color=Survivorship)) + geom_density(alpha=0.3, aes(y=..scaled..))

ggplot(sparrows, aes(x=L_humerous, fill=Survivorship, color=Survivorship)) + geom_density(alpha=0.3, aes(y=..scaled..))

ggplot(sparrows, aes(x=L_keel_sternum, fill=Survivorship, color=Survivorship)) + geom_density(alpha=0.3, aes(y=..scaled..))

ggplot(sparrows, aes(x=Total_length, y=Survivorship)) + geom_density_ridges()
## Picking joint bandwidth of 1.63

ggplot(sparrows, aes(x=Alar_extent, y=Survivorship)) + geom_density_ridges()
## Picking joint bandwidth of 2.27

ggplot(sparrows, aes(x=L_beak_head, y=Survivorship)) + geom_density_ridges()
## Picking joint bandwidth of 0.364

ggplot(sparrows, aes(x=L_humerous, y=Survivorship)) + geom_density_ridges()
## Picking joint bandwidth of 0.198

ggplot(sparrows, aes(x=L_keel_sternum, y=Survivorship)) + geom_density_ridges()
## Picking joint bandwidth of 0.401

ggplot(sparrows, aes(x=Survivorship, y=Total_length)) + geom_hex()

ggplot(sparrows, aes(x=Survivorship, y=Alar_extent)) + geom_hex()

ggplot(sparrows, aes(x=Survivorship, y=L_beak_head)) + geom_hex()

ggplot(sparrows, aes(x=Survivorship, y=L_humerous)) + geom_hex()

ggplot(sparrows, aes(x=Survivorship, y=L_keel_sternum)) + geom_hex()

lastplot <- ggplot(sparrows, aes(x=Alar_extent,y=L_keel_sternum)) + xlim(220,260) + geom_point(aes(color=Survivorship)) +
labs(x="Alar_Extent", y="L_keel_sternum", title="Sparrows Analysis")
lastplot + theme_bw()

lastplot + theme_cowplot()

lastplot + theme_dark()

lastplot + theme_economist()

lastplot + theme_fivethirtyeight()

lastplot + theme_tufte()

lastplot + theme_wsj()

- Based on all the plots, we can infere that the birds with lengths of
the variables in the extremes did not survive.
- For all the birds that survived, they had the lengths in the average
range.
- For those that did not survive, all the birds which had the lengths
in the extremes have died for sure.
- However, we cannot confidently say a range in which the birds will
die or survive.